#include <vector>
#include <iostream>
#include <string>
#include <fstream>
#include <map>
#include <algorithm>

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstring>
#include <string>
#include <iostream>
#include <fstream>
#include <sys/types.h>
#include <cstdlib>
#include <cfloat>
#include <cstddef>
#include <stdexcept>
#include <cstdio>
#include <map>
#include <sstream>
#include <cctype>
#include <iomanip>
#include <sstream>
#include <vector>
#include <limits>
using namespace std;

#ifdef _MSC_VER
#define LOCAL
#endif
#define t_index int
#define t_float double

class TIMER
{

#ifndef LOCAL
	timeval START;

public:
	TIMER() {
		gettimeofday(&START, 0);
	}
	inline double time() {
		timeval NOW;
		gettimeofday(&NOW, 0);
		return NOW.tv_sec - START.tv_sec + (NOW.tv_usec - START.tv_usec) / 1000000.;
	}
	void showtime()
	{
		cerr << time() << "s" << endl;
	}
#endif
#ifdef LOCAL
	double START;
public:
	TIMER()
	{
		START = clock();
	}
	inline double time()
	{
		return (clock() - START) / 1000.0;
	}
	void showtime()
	{
		cerr << time() << "s" << endl;
	}
#endif
};

#ifdef _MSC_VER 
# define ALIGN16_BEG __declspec(align(16))
# define ALIGN16_END 
#else 
# define ALIGN16_BEG
# define ALIGN16_END __attribute__((aligned(16)))
#endif

struct Seq {
	char *c;
	int len;
	int v_gene;
	int v_all;
	int j_gene;
	int j_all;
	string segkey;
	int *muts;
	int mut_cnt;


	string getSegkey() {
		if (segkey == "") {
			segkey = string(c, c + len) + ((char)v_gene) + ((char)v_all) + ((char)j_gene) + ((char)j_all);
		}
		return segkey;
	}

	Seq() {}
};

struct Seqs {
	string junc;
	//string v_fam;
	string v_gene;
	string v_all;
	string j_gene;
	string j_all;
	map<vector<int>, int> muts;
	Seqs(vector<pair<int, Seq> > &seqs) {
		junc = string(seqs[0].second.c, seqs[0].second.c + seqs[0].second.len);
		v_gene = seqs[0].second.v_gene;
		v_all = seqs[0].second.v_all;
		j_gene = seqs[0].second.j_gene;
		j_all = seqs[0].second.j_all;
		for (int i = 0; i < seqs.size(); i++) {
			//muts[seqs[i].second.muts]++;
		}
	}
};


ALIGN16_BEG __m128 m128d_dp[2][10005] ALIGN16_END;
ALIGN16_BEG __m128 dpstr1[10005] ALIGN16_END;
ALIGN16_BEG __m128 dpstr2[10005] ALIGN16_END;

__m128 LevenshteinDistance(char* s1, char* s2, char* s3, char* s4, char* s5, int n, int m) {
	//for (int i = 0; i <= n; i++) {
	//	m128d_dp[i][0] = _mm_set_ps1(i);
	//}
	for (int i = 0; i <= m; i++) {
		m128d_dp[0][i] = _mm_set_ps1(i);
	}
	for (int i = 0; i < n; i++) {
		dpstr1[i] = _mm_set_ps1(s1[i]);
	}
	for (int i = 0; i < m; i++) {
		dpstr2[i] = _mm_set_ps(s5[i], s4[i], s3[i], s2[i]);
	}

	for (int i = 1; i <= n; i++) {
		m128d_dp[i & 1][0] = _mm_set_ps1(i);
		for (int j = 1; j <= m; j++) {
			m128d_dp[i & 1][j] = _mm_add_ps(_mm_min_ps(m128d_dp[(i & 1) ^ 1][j], m128d_dp[i & 1][j - 1]), _mm_set_ps1(1));
			//ALIGN16_BEG __m128 a1 ALIGN16_END;
			//ALIGN16_BEG __m128 a2 ALIGN16_END;
			//ALIGN16_BEG __m128 a3 ALIGN16_END;
			//ALIGN16_BEG __m128 a4 ALIGN16_END;
			//ALIGN16_BEG __m128 a5 ALIGN16_END;
			//a1 = _mm_cmpeq_ps(dpstr1[i - 1], dpstr2[j - 1]);
			//a2 = _mm_andnot_ps(a1, _mm_set_ps1(1));
			//a3 = _mm_add_ps(m128d_dp[i - 1][j - 1], a2);
			//a4 = _mm_min_ps(m128d_dp[i][j], a3);


			m128d_dp[i & 1][j] = _mm_min_ps(m128d_dp[i & 1][j],
				_mm_add_ps(m128d_dp[(i & 1) ^ 1][j - 1],
				_mm_andnot_ps(
				_mm_cmpeq_ps(dpstr1[i - 1], dpstr2[j - 1])
				, _mm_set_ps1(1))
				));
		}
	}

	return m128d_dp[n & 1][m];
}

double GetLD(Seq& s1, Seq& s2) {
	int R = 0;
	for (int i = 0; i < s1.len; i++) {
		R += s1.c[i] != s2.c[i];
	}
	return R;
}



double vCompare(Seq& s1, Seq& s2) {
	if (s1.v_gene != s2.v_gene) {
		return 8;
	}
	if (s1.v_all != s2.v_all) {
		return 1;
	}
	return 0;
}



double jCompare(Seq& s1, Seq& s2) {
	if (s1.j_gene != s2.j_gene) {
		return 8;
	}
	if (s1.j_all != s2.j_all) {
		return 1;
	}
	return 0;
}




double SharedMuts(Seq& s1, Seq& s2) {
	double bonus = 0.;
	size_t p1 = 0, p2 = 0;
	while (p1 < s1.mut_cnt&& p2 < s2.mut_cnt) {
		if (s1.muts[p1] < s2.muts[p2]) {
			p1++;
		}
		else if (s2.muts[p2] < s1.muts[p1]) {
			p2++;
		}
		else {
			p1++;
			p2++;
			bonus += 0.35;
		}
	}
	return bonus;
}

double SharedMuts(Seq& s1, Seq& s2, int c) {
	double bonus = 0.;
	size_t p1 = 0, p2 = 0;
	while (p1 < s1.mut_cnt&& p2 < s2.mut_cnt) {
		if (s1.muts[p1] < s2.muts[p2]) {
			c--;
			if (c < 0) return bonus;
			p1++;
		}
		else if (s2.muts[p2] < s1.muts[p1]) {
			p2++;
		}
		else {
			p1++;
			p2++;
			bonus += 0.35;
		}
	}
	return bonus;
}

double SharedMuts(const vector<int> &muts1, const vector<int> &muts2) {
	double bonus = 0.;
	size_t p1 = 0, p2 = 0;
	while (p1 < muts1.size() && p2 < muts2.size()) {
		if (muts1[p1] < muts2[p2]) {
			p1++;
		}
		else if (muts1[p1] > muts2[p2]) {
			p2++;
		}
		else {
			p1++;
			p2++;
			bonus += 0.35;
		}
	}
	return bonus;
}

double SharedMuts(Seqs& s1, Seqs& s2) {
	double bonus = 0.;
	int cnt = 0;
	for (auto iter = s1.muts.begin(); iter != s1.muts.end(); iter++) {
		for (auto iter2 = s2.muts.begin(); iter2 != s2.muts.end(); iter2++) {
			cnt += iter->second * iter2->second;
			bonus += SharedMuts(iter->first, iter2->first) * iter->second * iter2->second;
		}
	}
	return bonus / cnt;
}

float* getScore_val[4];
int getScore_pos = 0;
double getScore_vjPenalty[4];
double getScore_mutBonus[4];
double getScore_editLength[4];
double getScore_lenPenalty[4];
Seq* getScore_seq2[4];
Seq* getScore_seq;

void update() {
	if (getScore_pos == 0) {
		return;
	}
	char *junc = getScore_seq->c;
	char *junc2 = getScore_seq2[0]->c;
	char *junc3 = getScore_pos > 1 ? getScore_seq2[1]->c : junc2;
	char *junc4 = getScore_pos > 2 ? getScore_seq2[2]->c : junc2;
	char *junc5 = getScore_pos > 3 ? getScore_seq2[3]->c : junc2;
	__m128 R = LevenshteinDistance(junc, junc2, junc3, junc4, junc5, getScore_seq->len, getScore_seq2[0]->len);
	for (int i = 0; i < getScore_pos; i++) {
		float LD = *(((float*)&R) + i);
		if (getScore_mutBonus[i] >(LD + getScore_vjPenalty[i])) {
			getScore_mutBonus[i] = (LD + getScore_vjPenalty[i] - 0.001);
		}
		*(getScore_val[i]) = (LD + getScore_vjPenalty[i] + getScore_lenPenalty[i] - getScore_mutBonus[i]) / getScore_editLength[i];
	}
	getScore_pos = 0;
}

int ct = 0, ct2 = 0;
double GetScore(Seq& s1, Seq& s2, float* dst, bool use) {

	double vPenalty = vCompare(s1, s2);
	double jPenalty = jCompare(s1, s2);
	double lenPenalty = fabs(0. + s1.len - s2.len) * 2;
	double editLength = (double)min(s1.len, s2.len);
	double maxBonus = 0.35 * min(s1.mut_cnt, s2.mut_cnt);
	if ((vPenalty + jPenalty + lenPenalty * 1.5 - maxBonus) > editLength * 0.5) {
		return 0.5;
	}
	double mutBonus;// = SharedMuts(s1, s2);
	int need = (vPenalty + jPenalty + lenPenalty * 1.5 - editLength * 0.5 + 0.349999) / 0.35;
	if (need > 0) {
		int p = min(s1.mut_cnt, s2.mut_cnt);
		int c = p - need;
		if (1.5 * c < p) {
			mutBonus = SharedMuts(s1, s2, c);
		}
		else {
			mutBonus = SharedMuts(s1, s2);
		}
	}
	else {
		mutBonus = SharedMuts(s1, s2);
	}

	//double dd = SharedMuts(s1, s2);


	//double mutBonus = SharedMuts(s1, s2);
	if ((vPenalty + jPenalty + lenPenalty * 1.5 - mutBonus) > editLength * 0.5) {
		return 0.5;
	}

	if (use && s1.len != s2.len) {
		getScore_val[getScore_pos] = dst;
		getScore_vjPenalty[getScore_pos] = jPenalty + vPenalty;
		getScore_mutBonus[getScore_pos] = mutBonus;
		getScore_editLength[getScore_pos] = editLength;
		getScore_seq = &s1;
		getScore_seq2[getScore_pos] = &s2;
		getScore_lenPenalty[getScore_pos] = lenPenalty;
		getScore_pos++;
		return 0;
	}

	int LD = 0;
	for (int i = 0; i < s1.len; i++) {
		LD += s1.c[i] != s2.c[i];
	}
	if (mutBonus >(LD + vPenalty + jPenalty)) {
		mutBonus = (LD + vPenalty + jPenalty - 0.001);
	}
	return (LD + vPenalty + jPenalty + lenPenalty - mutBonus) / editLength;
}



class Cluster {
public:
	vector<int> ids;
	Cluster(int id) {
		ids.push_back(id);
	}
	Cluster() {}
	void merge(Cluster other) {
		for (auto iter = other.ids.begin(); iter != other.ids.end(); iter++) {
			ids.push_back(*iter);
		}
	}
};

// self-destructing array pointer
template <typename type>
class auto_array_ptr {
private:
	type * ptr;
	auto_array_ptr(auto_array_ptr const &); // non construction-copyable
	auto_array_ptr& operator=(auto_array_ptr const &); // non copyable
public:
	auto_array_ptr()
		: ptr(NULL)
	{ }
	template <typename index>
	auto_array_ptr(index const size)
		: ptr(new type[size])
	{ }
	template <typename index, typename value>
	auto_array_ptr(index const size, value const val)
		: ptr(new type[size])
	{
		std::fill_n(ptr, size, val);
	}
	~auto_array_ptr() {
		delete[] ptr;
	}
	void free() {
		delete[] ptr;
		ptr = NULL;
	}
	template <typename index>
	void init(index const size) {
		ptr = new type[size];
	}
	template <typename index, typename value>
	void init(index const size, value const val) {
		init(size);
		std::fill_n(ptr, size, val);
	}
	inline operator type *() const { return ptr; }
};
struct node {
	t_index node1, node2;
	t_float dist;

	/*
	inline bool operator< (const node a) const {
	return this->dist < a.dist;
	}
	*/

	inline friend bool operator< (const node a, const node b) {
		return (a.dist < b.dist);
	}
};

class cluster_result {
private:
	auto_array_ptr<node> Z;
	t_index pos;

public:
	cluster_result(const t_index size)
		: Z(size)
		, pos(0)
	{}

	void append(const t_index node1, const t_index node2, const t_float dist) {
		Z[pos].node1 = node1;
		Z[pos].node2 = node2;
		Z[pos].dist = dist;
		++pos;
	}

	node * operator[] (const t_index idx) const { return Z + idx; }

	/* Define several methods to postprocess the distances. All these functions
	are monotone, so they do not change the sorted order of distances. */

	void sqrt() const {
		for (node * ZZ = Z; ZZ != Z + pos; ++ZZ) {
			ZZ->dist = ::sqrt(ZZ->dist);
		}
	}

	void sqrt(const t_float) const { // ignore the argument
		sqrt();
	}

	void sqrtdouble(const t_float) const { // ignore the argument
		for (node * ZZ = Z; ZZ != Z + pos; ++ZZ) {
			ZZ->dist = ::sqrt(2 * ZZ->dist);
		}
	}

#ifdef R_pow
#define my_pow R_pow
#else
#define my_pow pow
#endif

	void power(const t_float p) const {
		t_float const q = 1 / p;
		for (node * ZZ = Z; ZZ != Z + pos; ++ZZ) {
			ZZ->dist = my_pow(ZZ->dist, q);
		}
	}

	void plusone(const t_float) const { // ignore the argument
		for (node * ZZ = Z; ZZ != Z + pos; ++ZZ) {
			ZZ->dist += 1;
		}
	}

	void divide(const t_float denom) const {
		for (node * ZZ = Z; ZZ != Z + pos; ++ZZ) {
			ZZ->dist /= denom;
		}
	}
};

class doubly_linked_list {
	/*
	Class for a doubly linked list. Initially, the list is the integer range
	[0, size]. We provide a forward iterator and a method to delete an index
	from the list.

	Typical use: for (i=L.start; L<size; i=L.succ[I])
	or
	for (i=somevalue; L<size; i=L.succ[I])
	*/
public:
	t_index start;
	auto_array_ptr<t_index> succ;

private:
	auto_array_ptr<t_index> pred;
	// Not necessarily private, we just do not need it in this instance.

public:
	doubly_linked_list(const t_index size)
		// Initialize to the given size.
		: start(0)
		, succ(size + 1)
		, pred(size + 1)
	{
		for (t_index i = 0; i<size; ++i) {
			pred[i + 1] = i;
			succ[i] = i + 1;
		}
		// pred[0] is never accessed!
		//succ[size] is never accessed!
	}

	~doubly_linked_list() {}

	void remove(const t_index idx) {
		// Remove an index from the list.
		if (idx == start) {
			start = succ[idx];
		}
		else {
			succ[pred[idx]] = succ[idx];
			pred[succ[idx]] = pred[idx];
		}
		succ[idx] = 0; // Mark as inactive
	}

	bool is_inactive(t_index idx) const {
		return (succ[idx] == 0);
	}
};

// Indexing functions
// D is the upper triangular part of a symmetric (NxN)-matrix
// We require r_ < c_ !
vector<Seq> seqs;
#define D_(r_,c_) ( D[(static_cast<std::ptrdiff_t>(2*N-3-(r_))*(r_)>>1)+(c_)-1] )
//#define D_(r_,c_) (( D[(static_cast<std::ptrdiff_t>(2*N-3-(r_))*(r_)>>1)+(c_)-1]) > 0 ? D[(static_cast<std::ptrdiff_t>(2*N-3-(r_))*(r_)>>1)+(c_)-1] : D[(static_cast<std::ptrdiff_t>(2*N-3-(r_))*(r_)>>1)+(c_)-1] = GetScore(seqs[r_],seqs[c_]))
// Z is an ((N-1)x4)-array
#define Z_(_r, _c) (Z[(_r)*4 + (_c)])

inline static void f_average(float * const b, const t_float a, const t_float s, const t_float t) {
	*b = s*a + t*(*b);
	/*#ifndef FE_INVALID
	#if HAVE_DIAGNOSTIC
	#pragma GCC diagnostic push
	#pragma GCC diagnostic ignored "-Wfloat-equal"
	#endif
	if (fc_isnan(*b)) {
	throw(nan_error());
	}
	#if HAVE_DIAGNOSTIC
	#pragma GCC diagnostic pop
	#endif
	#endif*/
}

template <typename t_members>
static void NN_chain_core(const t_index N, float * const D, t_members * const members, cluster_result & Z2, vector<int> &len) {
	/*
	N: integer
	D: condensed distance matrix N*(N-1)/2
	Z2: output data structure

	This is the NN-chain algorithm, described on page 86 in the following book:

	Fionn Murtagh, Multidimensional Clustering Algorithms,
	*/
	vector<int> QQ;

	t_index i;

	auto_array_ptr<t_index> NN_chain(N);
	t_index NN_chain_tip = 0;

	t_index idx1, idx2;

	t_float size1, size2;
	doubly_linked_list active_nodes(N);

	t_float min;


#ifdef FE_INVALID
	if (feclearexcept(FE_INVALID)) throw fenv_error();
#endif

	for (t_index j = 0; j<N - 1; ++j) {
		if (NN_chain_tip <= 3) {
			NN_chain[0] = idx1 = active_nodes.start;
			NN_chain_tip = 1;

			idx2 = active_nodes.succ[idx1];
			min = D_(idx1, idx2);
			for (i = active_nodes.succ[idx2]; i<N; i = active_nodes.succ[i]) {
				if ((len[i] - len[idx1]) / len[idx1] > 0.6 / 2) {
					break;
				}
				if (D_(idx1, i) < min) {
					min = D_(idx1, i);
					idx2 = i;
				}
			}
		}  // a: idx1   b: idx2
		else {
			NN_chain_tip -= 3;
			idx1 = NN_chain[NN_chain_tip - 1];
			idx2 = NN_chain[NN_chain_tip];
			min = idx1 < idx2 ? D_(idx1, idx2) : D_(idx2, idx1);
		}  // a: idx1   b: idx2

		do {
			NN_chain[NN_chain_tip] = idx2;

			for (i = active_nodes.start; i<idx2; i = active_nodes.succ[i]) {
				if (D_(i, idx2) < min) {
					min = D_(i, idx2);
					idx1 = i;
				}
			}
			for (i = active_nodes.succ[idx2]; i<N; i = active_nodes.succ[i]) {

				if (D_(idx2, i) < min) {
					min = D_(idx2, i);
					idx1 = i;
				}
			}

			idx2 = idx1;
			idx1 = NN_chain[NN_chain_tip++];

		} while (idx2 != NN_chain[NN_chain_tip - 2]);

		Z2.append(idx1, idx2, min);

		if (idx1>idx2) {
			t_index tmp = idx1;
			idx1 = idx2;
			idx2 = tmp;
		}


		size1 = static_cast<t_float>(members[idx1]);
		size2 = static_cast<t_float>(members[idx2]);
		members[idx2] += members[idx1];

		// Remove the smaller index from the valid indices (active_nodes).
		active_nodes.remove(idx1);


		if (min > 0.32 && j < N-3) {
			QQ.push_back(idx2);
			active_nodes.remove(idx2);
			j++;
			continue;
		}

		t_float s = size1 / (size1 + size2);
		t_float t = size2 / (size1 + size2);
		for (i = active_nodes.start; i<idx1; i = active_nodes.succ[i])
			f_average(&D_(i, idx2), D_(i, idx1), s, t);
		// Update the distance matrix in the range (idx1, idx2).
		for (; i<idx2; i = active_nodes.succ[i])
			f_average(&D_(i, idx2), D_(idx1, i), s, t);
		// Update the distance matrix in the range (idx2, N).
		for (i = active_nodes.succ[idx2]; i<N; i = active_nodes.succ[i])
			f_average(&D_(idx2, i), D_(idx1, i), s, t);

	}
	for (int i = 0; i < QQ.size(); i++) {
		Z2.append(QQ[i], idx2, 1);
	}

#ifdef FE_INVALID
	if (fetestexcept(FE_INVALID)) throw fenv_error();
#endif
}


/*
Lookup function for a union-find data structure.

The function finds the root of idx by going iteratively through all
parent elements until a root is found. An element i is a root if
nodes[i] is zero. To make subsequent searches faster, the entry for
idx and all its parents is updated with the root element.
*/
class union_find {
private:
	auto_array_ptr<t_index> parent;
	t_index nextparent;

public:
	union_find(const t_index size)
		: parent(size>0 ? 2 * size - 1 : 0, 0)
		, nextparent(size)
	{ }

	t_index Find(t_index idx) const {
		if (parent[idx] != 0) {
			t_index p = idx;
			idx = parent[idx];
			if (parent[idx] != 0) {
				do {
					idx = parent[idx];
				} while (parent[idx] != 0);
				do {
					t_index tmp = parent[p];
					parent[p] = idx;
					p = tmp;
				} while (parent[p] != idx);
			}
		}
		return idx;
	}

	void Union(const t_index node1, const t_index node2) {
		parent[node1] = parent[node2] = nextparent++;
	}
};


class linkage_output {
private:
	t_float * Z;

public:
	linkage_output(t_float * const Z_)
		: Z(Z_)
	{}

	void append(const t_index node1, const t_index node2, const t_float dist,
		const t_float size) {
		if (node1<node2) {
			*(Z++) = static_cast<t_float>(node1 + 1e-3);
			*(Z++) = static_cast<t_float>(node2 + 1e-3);
		}
		else {
			*(Z++) = static_cast<t_float>(node2 + 1e-3);
			*(Z++) = static_cast<t_float>(node1 + 1e-3);
		}
		*(Z++) = dist;
		*(Z++) = size;
	}
};

#define size_(r_) ( ((r_<N) ? 1 : Z_(r_-N,3)) )

template <const bool sorted>
static void generate_SciPy_dendrogram(t_float * const Z, cluster_result & Z2, const t_index N) {
	// The array "nodes" is a union-find data structure for the cluster
	// identities (only needed for unsorted cluster_result input).
	union_find nodes(sorted ? 0 : N);
	if (!sorted) {
		std::stable_sort(Z2[0], Z2[N - 1]);
	}

	linkage_output output(Z);
	t_index node1, node2;

	for (node const * NN = Z2[0]; NN != Z2[N - 1]; ++NN) {
		// Get two data points whose clusters are merged in step i.
		if (sorted) {
			node1 = NN->node1;
			node2 = NN->node2;
		}
		else {
			// Find the cluster identifiers for these points.
			node1 = nodes.Find(NN->node1);
			node2 = nodes.Find(NN->node2);
			// Merge the nodes in the union-find data structure by making them
			// children of a new node.
			nodes.Union(node1, node2);
		}
		output.append(node1, node2, NN->dist, size_(node1) + size_(node2));
	}
}

/** The number of link stats (for the inconsistency computation) for each
cluster. */

#define CPY_NIS 4

/** The column offsets for the different link stats for the inconsistency
computation. */
#define CPY_INS_MEAN 0
#define CPY_INS_STD 1
#define CPY_INS_N 2
#define CPY_INS_INS 3

/** The number of linkage stats for each cluster. */
#define CPY_LIS 4

/** The column offsets for the different link stats for the linkage matrix. */
#define CPY_LIN_LEFT 0
#define CPY_LIN_RIGHT 1
#define CPY_LIN_DIST 2
#define CPY_LIN_CNT 3

#define CPY_MAX(_x, _y) ((_x > _y) ? (_x) : (_y))
#define CPY_MIN(_x, _y) ((_x < _y) ? (_x) : (_y))

#define NCHOOSE2(_n) ((_n)*(_n-1)/2)

#define CPY_BITS_PER_CHAR (sizeof(unsigned char) * 8)
#define CPY_FLAG_ARRAY_SIZE_BYTES(num_bits) (CPY_CEIL_DIV((num_bits), \
                                                          CPY_BITS_PER_CHAR))
#define CPY_GET_BIT(_xx, i) (((_xx)[(i) / CPY_BITS_PER_CHAR] >> \
                             ((CPY_BITS_PER_CHAR-1) - \
                              ((i) % CPY_BITS_PER_CHAR))) & 0x1)
#define CPY_SET_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] |= \
                              ((0x1) << ((CPY_BITS_PER_CHAR-1) \
                                         -((i) % CPY_BITS_PER_CHAR))))
#define CPY_CLEAR_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] &= \
                              ~((0x1) << ((CPY_BITS_PER_CHAR-1) \
                                         -((i) % CPY_BITS_PER_CHAR))))

#ifndef CPY_CEIL_DIV
#define CPY_CEIL_DIV(x, y) ((((double)x)/(double)y) == \
                            ((double)((x)/(y))) ? ((x)/(y)) : ((x)/(y) + 1))
#endif

#ifdef CPY_DEBUG
#define CPY_DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__)
#else
#define CPY_DEBUG_MSG(...)
#endif

void form_flat_clusters_from_monotonic_criterion(const double *Z,
	const double *mono_crit,
	int *T, double cutoff, int n) {
	int *curNode;
	int ndid, lid, rid, k, ms, nc;
	unsigned char *lvisited, *rvisited;
	double max_crit;
	const double *Zrow;
	const int bff = CPY_FLAG_ARRAY_SIZE_BYTES(n);

	curNode = (int*)malloc(n * sizeof(int));
	lvisited = (unsigned char*)malloc(bff);
	rvisited = (unsigned char*)malloc(bff);

	/** number of clusters formed so far. */
	nc = 0;
	/** are we in part of a tree below the cutoff? .*/
	ms = -1;
	k = 0;
	curNode[k] = (n * 2) - 2;
	memset(lvisited, 0, bff);
	memset(rvisited, 0, bff);
	ms = -1;
	while (k >= 0) {
		ndid = curNode[k];
		Zrow = Z + ((ndid - n) * CPY_LIS);
		lid = (int)Zrow[CPY_LIN_LEFT];
		rid = (int)Zrow[CPY_LIN_RIGHT];
		max_crit = mono_crit[ndid - n];
		CPY_DEBUG_MSG("cutoff: %5.5f maxc: %5.5f nc: %d\n", cutoff, max_crit, nc);
		if (ms == -1 && max_crit <= cutoff) {
			CPY_DEBUG_MSG("leader: i=%d\n", ndid);
			ms = k;
			nc++;
		}
		if (lid >= n && !CPY_GET_BIT(lvisited, ndid - n)) {
			CPY_SET_BIT(lvisited, ndid - n);
			curNode[k + 1] = lid;
			k++;
			continue;
		}
		if (rid >= n && !CPY_GET_BIT(rvisited, ndid - n)) {
			CPY_SET_BIT(rvisited, ndid - n);
			curNode[k + 1] = rid;
			k++;
			continue;
		}
		if (ndid >= n) {
			if (lid < n) {
				if (ms == -1) {
					nc++;
					T[lid] = nc;
				}
				else {
					T[lid] = nc;
				}
			}
			if (rid < n) {
				if (ms == -1) {
					nc++;
					T[rid] = nc;
				}
				else {
					T[rid] = nc;
				}
			}
			if (ms == k) {
				ms = -1;
			}
		}
		k--;
	}

	free(curNode);
	free(lvisited);
	free(rvisited);
}

void get_max_dist_for_each_cluster(const double *Z, double *max_dists, int n) {
	int *curNode;
	int ndid, lid, rid, k;
	unsigned char *lvisited, *rvisited;
	const double *Zrow;
	double max_dist;
	const int bff = CPY_FLAG_ARRAY_SIZE_BYTES(n);

	k = 0;
	vector<int> _curNode(n);
	curNode = &_curNode[0]; //(int*)malloc(n * sizeof(int));
	vector<unsigned char> _lvisited(bff);
	vector<unsigned char> _rvisited(bff);

	lvisited = &_lvisited[0];//(unsigned char*)malloc(bff);
	rvisited = &_rvisited[0];//(unsigned char*)malloc(bff);
	curNode[k] = (n * 2) - 2;
	memset(lvisited, 0, bff);
	memset(rvisited, 0, bff);
	while (k >= 0) {
		ndid = curNode[k];
		Zrow = Z + ((ndid - n) * CPY_LIS);
		lid = (int)Zrow[CPY_LIN_LEFT];
		rid = (int)Zrow[CPY_LIN_RIGHT];
		if (lid >= n && !CPY_GET_BIT(lvisited, ndid - n)) {
			CPY_SET_BIT(lvisited, ndid - n);
			curNode[k + 1] = lid;
			k++;
			continue;
		}
		if (rid >= n && !CPY_GET_BIT(rvisited, ndid - n)) {
			CPY_SET_BIT(rvisited, ndid - n);
			curNode[k + 1] = rid;
			k++;
			continue;
		}
		max_dist = Zrow[CPY_LIN_DIST];
		if (lid >= n) {
			max_dist = CPY_MAX(max_dist, max_dists[lid - n]);
		}
		if (rid >= n) {
			max_dist = CPY_MAX(max_dist, max_dists[rid - n]);
		}
		max_dists[ndid - n] = max_dist;
		CPY_DEBUG_MSG("i=%d maxdist[i]=%5.5f verif=%5.5f\n",
			ndid - n, max_dist, max_dists[ndid - n]);
		k--;
	}
	//free(curNode);
	//free(lvisited);
	//free(rvisited);
}



void form_flat_clusters_from_dist(const double *Z, int *T,
	double cutoff, int n) {

	vector<double >_max_dists(n);
	double *max_dists = &_max_dists[0];//(double*)malloc(sizeof(double) * n);
	get_max_dist_for_each_cluster(Z, max_dists, n);
	//CPY_DEBUG_MSG("cupid: n=%d cutoff=%5.5f MD[0]=%5.5f MD[n-1]=%5.5f\n", n, cutoff, max_dists[0], max_dists[n-2]);
	form_flat_clusters_from_monotonic_criterion(Z, max_dists, T, cutoff, n);
}



void linkage(const size_t N, float* matrix, double* Z, t_index* members, vector<int> &len) {
	//t_index* members = new t_index[N];

	cluster_result Z2(N - 1);
	NN_chain_core<t_index>(N, matrix, members, Z2, len);
	generate_SciPy_dendrogram<false>(Z, Z2, N);
	//delete []members;
}


inline string g(const string&s, int pos) {
	string r = "";
	while (s[pos] != '\"') {
		r += s[pos++];
	}
	return r;
}

inline int gi(const string&s, int pos) {
	int r = 0;
	while (s[pos] != '\"') {
		r = r * 10 + (s[pos++] - '0');
	}
	return r;
}

int *buffer = NULL;
int pos = 10000;

void fast_parse(const vector<string> & antibody, vector<Seq> &out) {
	for (int i = 1; i < antibody.size() - 1;) {
		out.resize(out.size() + 1);
		Seq &seq = out[out.size() - 1];
		i += 2;
		seq.v_all = gi(antibody[i], 14);
		i++;
		seq.v_gene = gi(antibody[i], 15);
		i += 4;
		//seq.id = g(antibody[i], 15);
		i += 2;
		seq.j_all = gi(antibody[i], 14);
		i += 2;
		seq.j_gene = gi(antibody[i], 15);
		i += 2;
		seq.c = (char*)&antibody[i][16];
		seq.len = 0;
		while (*(seq.c + seq.len) != '\"') {
			seq.len++;
		}
		//seq.junc = g(antibody[i], 16);
		i += 4;
		seq.mut_cnt = 0;
		if (antibody[i][4] != '}') {
			if (pos > 9999) {
				pos = 0;
				buffer = (int *)malloc(sizeof(int) * 10000);
			}
			seq.muts = buffer + pos;
			while (antibody[i][6] != '\"') {
				int loc = gi(antibody[i], 18);
				i++;
				//string mut = g(antibody[i], 18);
				int v_mut = antibody[i][18] << 8 | antibody[i][20];
				i += 3;
				seq.muts[seq.mut_cnt++] = loc << 16 | v_mut;
				pos++;
				if (pos == 10000) {
					buffer = (int *)malloc(sizeof(int) * 10000);
					memcpy(buffer, seq.muts, sizeof(int) * seq.mut_cnt);
					pos = seq.mut_cnt;
					seq.muts = buffer;
				}
			}
			i += 3;
		}
		else {
			i += 2;
		}
	}
}

class ABCSpeedup {
public:
	vector <string> cluster(vector <string> &antibody) {

		TIMER timer;


		//vector<Seq> seqs;

		vector<Seq> new_seqs;

		int n = seqs.size();



		seqs.clear();
		fast_parse(antibody, seqs);
		n = seqs.size();


		seqs.resize(n);
		cerr << n << endl;



		map<pair<size_t, string>, vector<pair<int, Seq> > > m;
		for (int i = 0; i < n; i++) {
			m[make_pair(seqs[i].len, seqs[i].getSegkey())].push_back(make_pair(i, seqs[i]));
		}

		vector<int> hash(n);
		vector<Seq> seqs2;
		//vector<Seqs> seqss;
		int c = 0;
		auto_array_ptr<t_index> members;
		members.init(n, 1);
		for (auto iter = m.begin(); iter != m.end(); iter++) {
			//seqss.push_back(Seqs(iter->second));
			seqs2.push_back(iter->second[0].second);
			members[c] = iter->second.size();
			for (int i = 0; i < iter->second.size(); i++) {
				hash[iter->second[i].first] = c;
			}
			++c;
		}


		n = seqs2.size();
		float* dist_matrix = (float *)malloc(sizeof(float) * n * (n - 1) / 2);


		vector<double> mmin(n, 0.6);

		int pos = 0;
		for (int i = 0; i < n; i++) {
			int j;
			int szi = seqs2[i].len;
			for (j = i + 1; j < n; j++) {

				int szj = seqs2[j].len;
				if ((double)(szj - szi) / szi > 0.6 / 2) {
					break;
				}
				dist_matrix[pos] = GetScore(seqs2[i], seqs2[j], &dist_matrix[pos], true);
				pos++;
				if (getScore_pos == 4 || j + 1 == seqs2.size() || seqs2[j].len != seqs2[j + 1].len) {
					update();
				}
			}
			update();
			for (; j < n; j++) {
				dist_matrix[pos++] = 0.6;
			}
		}


		const double cutoff = 0.32;
		//double *linkage_matrix = new double[4 * (seqs.size() - 1)];

		vector<double> linkage_matrix(4 * (seqs2.size() - 1));
		vector<int> len;
		for (int i = 0; i < n; i++) {
			len.push_back(seqs2[i].len);
		}

		linkage(n, &dist_matrix[0], &linkage_matrix[0], members, len);

		vector<int> flat_cluster(n);

		cerr << n << endl;

		form_flat_clusters_from_dist(&linkage_matrix[0], &flat_cluster[0],
			cutoff, seqs2.size());


		vector<string> R(seqs.size());
		vector<string> vs(flat_cluster.size());
		for (int i = 0; i < flat_cluster.size(); i++) {
			ostringstream os;
			os << flat_cluster[i];
			vs[i] = os.str();
		}
		for (int i = 0; i < seqs.size(); i++) {
			R[i] = vs[hash[i]];
		}

		return R;
	}

};

#ifdef LOCAL
int main() {


	string path = "C:\\Users\\Administrator\\Desktop\\test cases\\";
	string in = "100K.json";
	string out = "1K.out";

	vector<string> vs;
	ifstream fin(path + in);
	char buff[10000];
	while (fin.getline(buff, 10000)) {
		vs.push_back(buff);
	}

	vector<int> v;
	fin.close();
	fin.clear();
	fin.open("C:\\Users\\Administrator\\Desktop\\existing algorithms\\100k.out");
	int p;
	while (fin >> p) {
		v.push_back(p);
	}

	ABCSpeedup abcSpeedup;
	auto R = abcSpeedup.cluster(vs);
	return 0;
	long long n = R.size();

	long long correct = 0;
	long long baseCorrect = 0;
	for (int i = 0; i < R.size(); i++) {

		for (int j = i + 1; j < R.size(); j++) {
			if ((R[i] == R[j]) == (v[i] == v[j])) {
				correct += 1;
			}
			if ((R[i] == R[j]) == (i == j)) {
				baseCorrect += 1;
			}
		}
	}

	double accuracy = max(correct - baseCorrect, 0LL) / ((double)n * (n - 1) / 2 - baseCorrect);
	double rawScore = pow(max(accuracy - 0.99, 0.0) / (1 - 0.99), 4);

	cerr << correct << endl;
	cerr << baseCorrect << endl;
	cerr << n * (n - 1) / 2 << endl;

	cerr << accuracy << endl;
	cerr << rawScore << endl;

	return 0;
}
#endif